clear all
set more on
set mat 800
capture log close

cd "$E"

global path "E:\Project1_empirical" // global path file
global census  "$path\Census"     // census data
global city    "$path\City"       // city yearbook
global iv      "$path\IV"         // IV dataset
global chips   "$path\CHIPs"      // CHIPs dataset
global working_data = "$path\5th Edition (Age 20 to 30) Mean Controls\Working Data" // working data be used in regression
global citynew "$path\City\Raw data" // mean vars over past years

*adopath +   "$path\adofiles\plus"  // ado files 
*cd "$F"                            // change CD to F disk

*——————————————————————————————————————————————————————————————————————————————*

                                ** IV merge **
                               ** Data: fine1 **				
use $iv\fines.dta, clear

keep birthyear province premium fine bonus
rename birthyear yr
rename province pv
*dataex in 1/10
/*
bysort pv (yr) : gen byte first = _n == 1
expand 8 if first
sort pv yr
replace yr = yr - 7
bys pv: replace yr = yr[_n-1] + 1 if _n>1

local plus1 = _N + 7
set obs `plus1'
gsort -yr 
replace yr = yr[_n-1] - 1 if yr==.
sort yr
*/

*egen tag=group(province)

input
1972	11	0 0 0
1973	11	0 0 0
1974	11	0 0 0
1975	11	0 0 0
1976	11	0 0 0
1977	11	0 0 0
1978	11	0 0 0
1972	12	0 0 0
1973	12	0 0 0
1974	12	0 0 0
1975	12	0 0 0
1976	12	0 0 0
1977	12	0 0 0
1978	12	0 0 0
1972	13	0 0 0
1973	13	0 0 0
1974	13	0 0 0
1975	13	0 0 0
1976	13	0 0 0
1977	13	0 0 0
1978	13	0 0 0
1972	14	0 0 0
1973	14	0 0 0
1974	14	0 0 0
1975	14	0 0 0
1976	14	0 0 0
1977	14	0 0 0
1978	14	0 0 0
1972	15	0 0 0
1973	15	0 0 0
1974	15	0 0 0
1975	15	0 0 0
1976	15	0 0 0
1977	15	0 0 0
1978	15	0 0 0
1972	21	0 0 0
1973	21	0 0 0
1974	21	0 0 0
1975	21	0 0 0
1976	21	0 0 0
1977	21	0 0 0
1978	21	0 0 0
1972	22	0 0 0
1973	22	0 0 0
1974	22	0 0 0
1975	22	0 0 0
1976	22	0 0 0
1977	22	0 0 0
1978	22	0 0 0
1972	23	0 0 0
1973	23	0 0 0
1974	23	0 0 0
1975	23	0 0 0
1976	23	0 0 0
1977	23	0 0 0
1978	23	0 0 0
1972	31	0 0 0
1973	31	0 0 0
1974	31	0 0 0
1975	31	0 0 0
1976	31	0 0 0
1977	31	0 0 0
1978	31	0 0 0
1972	32	0 0 0
1973	32	0 0 0
1974	32	0 0 0
1975	32	0 0 0
1976	32	0 0 0
1977	32	0 0 0
1978	32	0 0 0
1972	33	0 0 0
1973	33	0 0 0
1974	33	0 0 0
1975	33	0 0 0
1976	33	0 0 0
1977	33	0 0 0
1978	33	0 0 0
1972	34	0 0 0
1973	34	0 0 0
1974	34	0 0 0
1975	34	0 0 0
1976	34	0 0 0
1977	34	0 0 0
1978	34	0 0 0
1972	35	0 0 0
1973	35	0 0 0
1974	35	0 0 0
1975	35	0 0 0
1976	35	0 0 0
1977	35	0 0 0
1978	35	0 0 0
1972	36	0 0 0
1973	36	0 0 0
1974	36	0 0 0
1975	36	0 0 0
1976	36	0 0 0
1977	36	0 0 0
1978	36	0 0 0
1972	37	0 0 0
1973	37	0 0 0
1974	37	0 0 0
1975	37	0 0 0
1976	37	0 0 0
1977	37	0 0 0
1978	37	0 0 0
1972	41	0 0 0
1973	41	0 0 0
1974	41	0 0 0
1975	41	0 0 0
1976	41	0 0 0
1977	41	0 0 0
1978	41	0 0 0
1972	42	0 0 0
1973	42	0 0 0
1974	42	0 0 0
1975	42	0 0 0
1976	42	0 0 0
1977	42	0 0 0
1978	42	0 0 0
1972	43	0 0 0
1973	43	0 0 0
1974	43	0 0 0
1975	43	0 0 0
1976	43	0 0 0
1977	43	0 0 0
1978	43	0 0 0
1972	44	0 0 0
1973	44	0 0 0
1974	44	0 0 0
1975	44	0 0 0
1976	44	0 0 0
1977	44	0 0 0
1978	44	0 0 0
1972	45	0 0 0
1973	45	0 0 0
1974	45	0 0 0
1975	45	0 0 0
1976	45	0 0 0
1977	45	0 0 0
1978	45	0 0 0
1972	46	0 0 0
1973	46	0 0 0
1974	46	0 0 0
1975	46	0 0 0
1976	46	0 0 0
1977	46	0 0 0
1978	46	0 0 0
1972	50	0 0 0
1973	50	0 0 0
1974	50	0 0 0
1975	50	0 0 0
1976	50	0 0 0
1977	50	0 0 0
1978	50	0 0 0
1972	51	0 0 0
1973	51	0 0 0
1974	51	0 0 0
1975	51	0 0 0
1976	51	0 0 0
1977	51	0 0 0
1978	51	0 0 0
1972	52	0 0 0
1973	52	0 0 0
1974	52	0 0 0
1975	52	0 0 0
1976	52	0 0 0
1977	52	0 0 0
1978	52	0 0 0
1972	53	0 0 0
1973	53	0 0 0
1974	53	0 0 0
1975	53	0 0 0
1976	53	0 0 0
1977	53	0 0 0
1978	53	0 0 0
1972	54	0 0 0
1973	54	0 0 0
1974	54	0 0 0
1975	54	0 0 0
1976	54	0 0 0
1977	54	0 0 0
1978	54	0 0 0
1972	61	0 0 0
1973	61	0 0 0
1974	61	0 0 0
1975	61	0 0 0
1976	61	0 0 0
1977	61	0 0 0
1978	61	0 0 0
1972	62	0 0 0
1973	62	0 0 0
1974	62	0 0 0
1975	62	0 0 0
1976	62	0 0 0
1977	62	0 0 0
1978	62	0 0 0
1972	63	0 0 0
1973	63	0 0 0
1974	63	0 0 0
1975	63	0 0 0
1976	63	0 0 0
1977	63	0 0 0
1978	63	0 0 0
1972	64	0 0 0
1973	64	0 0 0
1974	64	0 0 0
1975	64	0 0 0
1976	64	0 0 0
1977	64	0 0 0
1978	64	0 0 0
1972	65	0 0 0
1973	65	0 0 0
1974	65	0 0 0
1975	65	0 0 0
1976	65	0 0 0
1977	65	0 0 0
1978	65	0 0 0
end

xtset pv yr
save "$working_data\fine1", replace 

********************************************************************************
********************************************************************************
******************************************************************************** 

                           ** Merge for Reg1 and IV **
                           **      Data:coredu      **

*——————————————————————————————————————————————————————————————————————————————*
************************************* 2002 *************************************
use "$working_data\temp02", clear

/*prefecture level edu_cor*/
tempfile f1
sort prefect, stable
save `f1'
statsby cor_prefect=r(rho) , by(prefect) : corr hedu wedu
sort prefect, stable
merge prefect using `f1'
drop _merge

*———————————————————————————————— Inequality ———————————————————————————————————
egen gini = inequal(hhwage), by(prefect) index(gini) // gini //

egen tagcity=group(prefect)

    ** income by 90 percentile, 10 percentile **
bys prefect: sum hhwage, detail
bys prefect: egen wage10=pctile(hhwage), p(10)
bys prefect: egen wage90=pctile(hhwage), p(90)

gen wage9010 = log(wage90)-log(wage10) 

    ** income by 80 percentile, 20 percentile **
bys prefect: sum hhwage, detail
bys prefect: egen wage20=pctile(hhwage), p(20)
bys prefect: egen wage80=pctile(hhwage), p(80)

gen wage8020 = log(wage80)-log(wage20) 

    ** median income  **
bys prefect: sum hhwage, detail
bys prefect: egen mwage = pctile(hhwage), p(50)
	
collapse (first) cor_prefect year gini wage9010 wage8020 mwage, by(prefect) 
// 54 prefectures //

save "$working_data\coredu02", replace 

*———————————————————————— merge sex ratio 2002 —————————————————————————————————
use "$census\2000_county.dta", clear

tostring code, gen(p)
keep if substr(p,5,6)=="00"

bys code: gen malepref=(male_15_19/5)*2+male_20_24+(male_25_29/5)*4 //18 to 28//

bys code: gen femalepref=(female_15_19/5)*2+female_20_24+(female_25_29/5)*4 

gen srpref=malepref/femalepref // sex ratio of 20 to 30//

egen poppref=rowtotal(male_0-female_85) //total pop at county level//
order poppref,after(county)
gen pop2030=malepref+femalepref
gen share2030=pop2030/poppref //share of age 20 to 30 group//

* each "code" is unique, no need to use bys *
gen pop1564_1=((male_10_14+female_10_14)/5)*2+((male_60_64+female_60_64)/5)*3 // 13 to 62
egen pop1564_2=rowtotal(male_15_19-female_55_59)
gen pop1564=pop1564_1+pop1564_2
gen share1564=pop1564/poppref //share of age 15 to 64 group//

gen prefect=substr(p,1,4), after(code)
destring prefect, force replace

keep code prefect county srpref poppref pop2030 share2030 pop1564 share1564

* merge *
merge 1:1 prefect using "$working_data\coredu02"
keep if _merge==3
drop _merge

save "$working_data\coredu02", replace // prefecture level, 54 Obs, 11 vars //

*———————————————————————— merge fine rate 2002 —————————————————————————————————
use "$working_data\fine1", clear

gen t1=0
replace t1=1 if yr>=1972 & yr<=1982
bys pv: egen afine1=mean(fine) if t1==1
bys pv: egen apremium1=mean(premium) if t1==1
bys pv: egen abonus1=mean(bonus) if t1==1

gen d1=0
replace d1=1 if yr>=1979 & yr<=1982
bys pv: egen afine2=mean(fine) if d1==1
bys pv: egen apremium2=mean(premium) if d1==1
bys pv: egen abonus2=mean(bonus) if d1==1

collapse afine1 apremium1 abonus1 afine2 apremium2 abonus2, by(pv)

qui sum afine1
bys pv: gen afinenorm1=(afine1 - r(mean))/r(sd)

/*qui sum afine2
bys pv: gen afinenorm2=(afine2 - r(mean))/r(sd) same as afinenorm1 */ 

save "$working_data\IV", replace

*———————————————————————— merge city info 2002 —————————————————————————————————
use "$citynew\temp02", clear

tostring code, force replace
gen prefect=substr(code,1,4),after (code)
destring code prefect,force replace

replace prefect=1101 if prefect==1100 //北京//
replace prefect=1423 if prefect==1411 //吕梁地区//
replace prefect=5001 if prefect==5000 //重庆//
*without YunNan Wenshang Dali Dehong*

merge 1:1 prefect using "$working_data\coredu02"
drop if _merge==1
drop _merge

order cor_prefect srpref year, after(prefect)

tostring code, force replace
gen pv=substr(code,1,2),after(code)
destring code pv,force replace

save "$working_data\coredu02", replace // prefecture level, 54 Obs, 31 vars //

sum

merge m:1 pv using "$working_data\IV"
keep if _merge==3
drop _merge

*order fine finenorm,after(year)

sum

save "$working_data\coredu02", replace // 54 Obs, 42 vars //

**————————————————————————————————————2007————————————————————————————————————**
*———————————————————————————————————————————————————————————————————————————————
use "$working_data\temp07", clear

/*prefecture level edu_cor*/
tempfile f1
sort prefect, stable
save `f1'
statsby cor_prefect=r(rho) , by(prefect) : corr hedu wedu
sort prefect, stable
merge prefect using `f1'
drop _merge

*———————————————————————————————— Inequality ———————————————————————————————————
egen gini = inequal(hhwage), by(prefect) index(gini) // gini //

egen tagcity=group(prefect)

    ** income by 90 percentile, 10 percentile **
bys prefect: sum hhwage, detail
bys prefect: egen wage10=pctile(hhwage), p(10)
bys prefect: egen wage90=pctile(hhwage), p(90)

gen wage9010 = log(wage90)-log(wage10) 

    ** income by 80 percentile, 20 percentile **
bys prefect: sum hhwage, detail
bys prefect: egen wage20=pctile(hhwage), p(20)
bys prefect: egen wage80=pctile(hhwage), p(80)

gen wage8020 = log(wage80)-log(wage20) 

    ** median income  **
bys prefect: sum hhwage, detail
bys prefect: egen mwage = pctile(hhwage), p(50)
	
collapse (first) cor_prefect year gini wage9010 wage8020 mwage, by(prefect) 
//18 prefectures//

save "$working_data\coredu07", replace

*————————————————————————————— merge sex ratio 2007 ————————————————————————————
use "$census\2010_county.dta", clear

tostring code, gen(p)
keep if substr(p,5,6)=="00"

bys code: gen malepref=((male_20_24/5)*2+male_25_29+(male_30_34/5)*4) //23 to 33//

bys code: gen femalepref=((female_20_24/5)*2+female_25_29+(female_30_34/5)*4) 

gen srpref=malepref/femalepref // sex ratio of 20 to 30//

egen poppref=rowtotal(male_0-female_85) //total pop at county level//
order poppref,after(county)
gen pop2030=malepref+femalepref
gen share2030=pop2030/poppref //share of age 20 to 30 group//

gen pop1564_1=((male_15_19+female_15_19)/5)*2+((male_65_69+female_65_69)/5)*3
egen pop1564_2=rowtotal(male_20_24-female_60_64) // 18 to 67 //
gen pop1564=pop1564_1+pop1564_2
gen share1564=pop1564/poppref //share of age 15 to 64 group//

gen prefect=substr(p,1,4), after(code)
destring prefect, force replace

keep code prefect county srpref poppref pop2030 share2030 pop1564 share1564

* Merge *
merge 1:1 prefect using "$working_data\coredu07"
keep if _merge==3
drop _merge

save "$working_data\coredu07", replace // prefecture level, 11 vars, 18 Obs //

*———————————————————————————— merge fine rate 2007 —————————————————————————————
use "$working_data\fine1", clear

gen t2=0
replace t2=1 if yr>=1977 & yr<=1987
bys pv: egen afine1=mean(fine) if t2==1
bys pv: egen apremium1=mean(premium) if t2==1
bys pv: egen abonus1=mean(bonus) if t2==1

gen d2=0
replace d2=1 if yr>=1979 & yr<=1987
bys pv: egen afine2=mean(fine) if d2==1
bys pv: egen apremium2=mean(premium) if d2==1
bys pv: egen abonus2=mean(bonus) if d2==1

collapse afine1 apremium1 abonus1 afine2 apremium2 abonus2, by(pv)

qui sum afine1
bys pv: gen afinenorm1=(afine1 - r(mean))/r(sd)

save "$working_data\IV", replace

*———————————————————————— merge city info 2007 —————————————————————————————————
use "$citynew\temp07", clear

tostring code, force replace
gen prefect=substr(code,1,4),after (code)
destring code prefect,force replace

replace prefect=3101 if prefect==3100 //上海//
replace prefect=5001 if prefect==5000 //重庆//

merge 1:1 prefect using "$working_data\coredu07"
keep if _merge==3
drop _merge

order cor_prefect srpref year, after(prefect)

tostring code, force replace
gen pv=substr(code,1,2),after(code)
destring code pv,force replace

save "$working_data\coredu07", replace // prefecture level, 31 vars, 18 Obs //

sum

merge m:1 pv using "$working_data\IV"
keep if _merge==3
drop _merge

*order fine finenorm,after(year)

sum

save "$working_data\coredu07", replace // prefecture level, 18 Obs, 38 vars //


**———————————————————————————————————2013—————————————————————————————————————**
*——————————————————————————————————————————————————————————————————————————————*
use "$working_data\temp13", clear

/*prefecture level edu_cor*/
tempfile f1
sort prefect, stable
save `f1'
statsby cor_prefect=r(rho) , by(prefect) : corr hedu wedu
sort prefect, stable
merge prefect using `f1'
drop _merge

*———————————————————————————————— Inequality ———————————————————————————————————
egen gini = inequal(hhwage), by(prefect) index(gini) // gini //

egen tagcity=group(prefect)

    ** income by 90 percentile, 10 percentile **
bys prefect: sum hhwage, detail
bys prefect: egen wage10=pctile(hhwage), p(10)
bys prefect: egen wage90=pctile(hhwage), p(90)

gen wage9010 = log(wage90)-log(wage10) 

    ** income by 80 percentile, 20 percentile **
bys prefect: sum hhwage, detail
bys prefect: egen wage20=pctile(hhwage), p(20)
bys prefect: egen wage80=pctile(hhwage), p(80)

gen wage8020 = log(wage80)-log(wage20) 

    ** median income  **
bys prefect: sum hhwage, detail
bys prefect: egen mwage = pctile(hhwage), p(50)
	
collapse (first) cor_prefect year gini wage9010 wage8020 mwage, by(prefect) 
//94 prefectures//

save "$working_data\coredu13", replace

*————————————————————————————— merge sex ratio 2013 ————————————————————————————
use "$census\2010_county.dta", clear

tostring code, gen(p)
keep if substr(p,5,6)=="00"

bys code: gen malepref=((male_15_19/5)*3+male_20_24+(male_25_29/5)*3) //17 to 27//

bys code: gen femalepref=((female_15_19/5)*3+female_20_24+(female_25_29/5)*3) 

gen srpref=malepref/femalepref // sex ratio of 20 to 30//

egen poppref=rowtotal(male_0-female_85) //total pop at county level//
order poppref,after(county)
gen pop2030=malepref+femalepref
gen share2030=pop2030/poppref //share of age 20 to 30 group//

gen pop1564_1=((male_10_14+female_10_14)/5)*3+((male_60_64+female_60_64)/5)*2
egen pop1564_2=rowtotal(male_15_19-female_55_59) // 12 to 61 //
gen pop1564=pop1564_1+pop1564_2
gen share1564=pop1564/poppref //share of age 15 to 64 group//

gen prefect=substr(p,1,4), after(code)
destring prefect, force replace

keep code prefect county srpref poppref pop2030 share2030 pop1564 share1564

* merge sex ratio 2013 *
merge 1:1 prefect using "$working_data\coredu13"
keep if _merge==3
drop _merge

save "$working_data\coredu13", replace // prefecture level//

*———————————————————————— merge fine rate 2013 —————————————————————————————————
use "$working_data\fine1", clear

gen t3=0
replace t3=1 if yr>=1983 & yr<=1993
bys pv: egen afine1=mean(fine) if t3==1
bys pv: egen apremium1=mean(premium) if t3==1
bys pv: egen abonus1=mean(bonus) if t3==1

gen d3=0
replace d3=1 if yr>=1983 & yr<=1993
bys pv: egen afine2=mean(fine) if d3==1
bys pv: egen apremium2=mean(premium) if d3==1
bys pv: egen abonus2=mean(bonus) if d3==1

collapse afine1 apremium1 abonus1 afine2 apremium2 abonus2, by(pv)

qui sum afine1
bys pv: gen afinenorm1=(afine1 - r(mean))/r(sd)

save "$working_data\IV", replace

*———————————————————————— merge city info 2013 —————————————————————————————————
use "$citynew\temp13", clear

tostring code, force replace
gen prefect=substr(code,1,4), after(code)
destring code prefect,force replace

replace prefect=1101 if prefect==1100 //beijing//
replace prefect=5001 if prefect==5000 //chongqing//
*without Yunnan wenshang，Dali，Dehong*

merge 1:1 prefect using "$working_data\coredu13"
drop if _merge==1
drop _merge

order cor_prefect srpref year, after(prefect)

tostring code, force replace
gen pv=substr(code,1,2),after(code)
destring code pv,force replace

save "$working_data\coredu13", replace // prefecture level, 94 Obs, 33 vars //

merge m:1 pv using "$working_data\IV"
keep if _merge==3
drop _merge

*order fine finenorm,after(year)

save "$working_data\coredu13", replace // prefecture level, 94 Obs, 40 vars//

*———————————————————————————————————————————————————————————————————————————————
use "$working_data\coredu02", clear

append using "$working_data\coredu07"
save "$working_data\coredu", replace

append using "$working_data\coredu13"
save "$working_data\coredu", replace

*—————————————————————————————— merge done —————————————————————————————————————
